Here we have 82 data points in the galaxy from a 1-dimensional unknown distribution. The aim is to fit a normal finite mixture model with a pre-specified number of latent clusters.
Each chain seems to converge to something, but the mixing of several chains are very poor. Is there still a label switching issue although the means are ordered in the prior?
library(tidyverse)
library(rstan)
## devtools::install_github('jburos/biostan', build_vignettes = TRUE, dependencies = TRUE)
## library(biostan)
library(DPpackage)
set.seed(732565397)
data(galaxy, package = "DPpackage")
galaxy <- galaxy %>%
as_data_frame() %>%
mutate(log_speed = log(speed),
k_speed = speed / 1000)
## Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.
galaxy
## # A tibble: 82 x 3
## speed log_speed k_speed
## <dbl> <dbl> <dbl>
## 1 9172 9.12 9.17
## 2 9350 9.14 9.35
## 3 9483 9.16 9.48
## 4 9558 9.17 9.56
## 5 9775 9.19 9.78
## 6 10227 9.23 10.2
## 7 10406 9.25 10.4
## 8 16084 9.69 16.1
## 9 16170 9.69 16.2
## 10 18419 9.82 18.4
## # … with 72 more rows
ggplot(data = galaxy, mapping = aes(x = k_speed)) +
geom_point(y = 0.5) +
scale_y_continuous(limits = c(0,1), breaks = NULL) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
The speed data seem to show some distinct clusters. We will use the following grid for later visualization.
grid_max <- 40
grid_min <- -20
grid_length <- 100
We will start with density estimation with a single normal distribution as a starting point.
\[\begin{align*} y_{i} | (\mu, \sigma^{2}) &\sim N(\mu, \sigma^{2})\\ \\ p(\mu,\sigma^{2}) &= p(\mu|\sigma^{2})p(\sigma^{2})\\ &= N(\mu | m, s^{2}) Inverse-Gamma(\sigma^{2} | \alpha, \beta)\\ &= \left[ \frac{1}{\sqrt{2\pi s^{2}}} \exp \left( - \frac{(\mu - m)^{2}}{2 \times s^{2}} \right) \right] \left[ \frac{\beta^{\alpha}}{\Gamma(\alpha)} (\sigma^{2})^{-\alpha - 1} \exp \left( - \frac{\beta}{\sigma^{2}} \right) \right]\\ &\text{where}\\ m &= 0\\ s^{2} &= 10^{3}\\ \alpha &= 10^{-3}\\ \beta &= 10^{-3}\\ \end{align*} \]
normal_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal.stan")
## Warning: replacing previous import 'rstan::loo' by 'rstanarm::loo' when loading 'biostan'
biostan::print_stan_code(normal_stan_code, section = NULL)
## data {
## // Hyperparameters
## real alpha;
## real beta;
## real m;
## real s_squared;
##
## // Define variables in data
## // Number of observations (an integer)
## int<lower=0> n;
## // Outcome (a real vector of length n)
## real y[n];
##
## // Grid evaluation
## real grid_max;
## real grid_min;
## int<lower=1> grid_length;
## }
##
## transformed data {
## real s;
## real grid_step;
##
## s = sqrt(s_squared);
## grid_step = (grid_max - grid_min) / (grid_length - 1);
## }
##
## parameters {
## // Define parameters to estimate
## // Population mean (a real number)
## real mu;
## // Population variance (a positive real number)
## real<lower=0> sigma_squared;
## }
##
## transformed parameters {
## // Population standard deviation (a positive real number)
## real<lower=0> sigma;
## // Standard deviation (derived from variance)
## sigma = sqrt(sigma_squared);
## }
##
## model {
## // Prior part of Bayesian inference
## // Mean
## mu ~ normal(m, s);
## // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior
## sigma_squared ~ inv_gamma(alpha, beta);
##
## // Likelihood part of Bayesian inference
## // Outcome model N(mu, sigma^2) (use SD rather than Var)
## y ~ normal(mu, sigma);
## }
##
## generated quantities {
##
## real log_f[grid_length];
##
## for (g in 1:grid_length) {
## // Definiting here avoids reporting of these intermediates.
## real grid_value;
## grid_value = grid_min + grid_step * (g - 1);
## log_f[g] = normal_lpdf(grid_value | mu, sigma);
## }
##
## }
##
Helper functions for here and later use.
print_relevant_pars <- function(fit, pars = c("mu","sigma_squared","Pi","sigma","lp__")) {
print(fit, pars = pars)
}
traceplot_all <- function(fit, pars = c("mu","sigma","Pi","lp__")) {
for (par in pars) {
print(traceplot(fit, inc_warmup = TRUE, pars = par))
}
}
pairs_plot_all <- function(fit, pars = c("mu","sigma_squared","Pi")) {
for (par in pars) {
pairs(fit, pars = par)
}
}
plot_draws <- function(stan_fit) {
## Note direct access to global variables
draw_data <- tidybayes::tidy_draws(stan_fit) %>%
select(.chain, .iteration, .draw, starts_with("log_f")) %>%
gather(key = key, value = value, starts_with("log_f")) %>%
mutate(key = gsub("log_f|\\[|\\]", "", key) %>% as.integer(),
x = factor(key, labels = seq(from = grid_min, to = grid_max, length.out = grid_length)) %>%
as.character() %>%
as.numeric(),
value = exp(value))
summary_density <- draw_data %>%
group_by(.chain, x) %>%
summarize(value = mean(value))
ggplot(data = draw_data, mapping = aes(x = x, y = value,
group = interaction(.chain, .iteration, .draw))) +
## geom_line(size = 0.1, alpha = 1/20) +
geom_line(data = summary_density, mapping = aes(group = .chain), size = 0.5, color = "gray") +
geom_point(data = galaxy, mapping = aes(x = k_speed, group = NULL), y = 0) +
facet_wrap(~ .chain) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
legend.key = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_blank())
}
normal_stan_fit <-
rstan::stan(model_code = normal_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
print(normal_stan_fit, pars = c("mu","sigma","lp__"))
## Inference for Stan model: 588758d84b4b479190683080904cb88c.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu 20.83 0.01 0.52 19.81 20.48 20.83 21.17 21.85 10130 1
## sigma 4.61 0.00 0.36 3.97 4.36 4.59 4.84 5.39 9190 1
## lp__ -166.29 0.01 0.99 -168.93 -166.70 -165.99 -165.57 -165.31 5240 1
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:22:44 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
The sampling process seems ok. Plot density function draws.
plot_draws(normal_stan_fit)
Apparently, a single normal distribution model is not a sufficient description of the density.
Now we examine if modeling the distribution as a mixture of several underlying cluster-specific normals better fit the data. Here we assume some fixed number of clusters \(H\).
Let \(z_{i} \in \left\{ 1, \dots, H \right\}\) be the cluster membership latent variable and \(\mathbf{z}_{i} = (I(z_{i}=1),\dots,I(z_{i}=H))^{T}\) be the vector indicator version. We have the following model.
\[\begin{align*} y_{i} | (z_{i}, \mu_{1},\dots,\mu_{H}, \sigma^{2}_{1},\dots,\sigma^{2}_{H}) &\sim N(\mu_{z_{i}}, \sigma^{2}_{z_{i}})\\ \mathbf{z}_{i} | \boldsymbol{\pi} &\sim Multinomial(1, \boldsymbol{\pi})\\ \\ p(\mu_{h},\sigma^{2}_{h}) &= p(\mu_{h}|\sigma^{2}_{h})p(\sigma^{2}_{h})\\ &= N(\mu_{h} | m, s^{2}) Inverse-Gamma(\sigma^{2}_{h} | \alpha, \beta)\\ &= \left[ \frac{1}{\sqrt{2\pi s^{2}}} \exp \left( - \frac{(\mu_{h} - m)^{2}}{2 \times s^{2}} \right) \right] \left[ \frac{\beta^{\alpha}}{\Gamma(\alpha)} (\sigma^{2}_{h})^{-\alpha - 1} \exp \left( - \frac{\beta}{\sigma^{2}_{h}} \right) \right]\\ &\text{where}\\ m &= 0\\ s^{2} &= 10^{3}\\ \alpha &= 10^{-3}\\ \beta &= 10^{-3}\\ \\ p(\boldsymbol{\pi}) &\sim Dirichlet \left( \frac{\alpha}{H}, \dots, \frac{\alpha}{H} \right)\\ \end{align*} \]
Summing out the latent categorical variable \(z_{i}\) results in the following (conditioning on parameters suppressed for simplicity) marginal density. Note the cluster membership latent variable \(z_i\) is not measured, thus, it is a discrete parameter. Stan’s Hamiltonian Monte Carlo (HMC) cannot deal with discrete parameters, this marginalization step is required fro Stan. JAGS seems to allow a discrete parameter and accepts the original model above.
\[\begin{align*} p(y) &= \sum^{H}_{z=1} p(y|z) p(z)\\ &= \sum^{H}_{z=1} \pi_{z} p(y|z)\\ &= \sum^{H}_{h=1} \pi_h N(y | \mu_{h}, \sigma^{2}_{h})\\ \end{align*} \]
Another layer of complexity is the label switching issue. That is, the cluster IDs \(\left\{ 1, \dots, H \right\}\) do not specify which cluster corresponds to which ID when all the cluster-specific prior for the normal distribution \(p(\mu_{h},\sigma^{2}_{h})\) is the same. The solution is somehow make cluster IDs distinguishable. Ideally, this should be done on the substantive basis to set a different prior for each hypothesized latent cluster. In the one dimensional case we have, a common solution seems to constrain \(\mu_{1} \le \mu_{2} \le \dots \le \mu_{H}\).
normal_fixed_mixture_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture_unordered.stan")
biostan::print_stan_code(normal_fixed_mixture_stan_code, section = NULL)
## data {
## // Hyperparameters
## real alpha;
## real beta;
## real m;
## real s_squared;
## real<lower=0> dirichlet_alpha;
##
## // Define variables in data
## // Number of observations (an integer)
## int<lower=0> n;
## // Outcome (a real vector of length n)
## real y[n];
## // Number of latent clusters
## int<lower=1> H;
##
## // Grid evaluation
## real grid_max;
## real grid_min;
## int<lower=1> grid_length;
## }
##
## transformed data {
## real s;
## real grid_step;
##
## s = sqrt(s_squared);
## grid_step = (grid_max - grid_min) / (grid_length - 1);
## }
##
## parameters {
## // Define parameters to estimate
## // Population mean (a real number)
## vector[H] mu;
## // Population variance (a positive real number)
## real<lower=0> sigma_squared[H];
## // Cluster probability
## simplex[H] Pi;
## }
##
## transformed parameters {
## // Population standard deviation (a positive real number)
## real<lower=0> sigma[H];
## // Standard deviation (derived from variance)
## sigma = sqrt(sigma_squared);
## }
##
## model {
## // Temporary vector for loop use. Need to come first before priors.
## real contributions[H];
##
## // Prior part of Bayesian inference
## // All vectorized
## // Mean
## mu ~ normal(m, s);
## // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior
## sigma_squared ~ inv_gamma(alpha, beta);
## // cluster probability vector
## Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H));
##
## // Likelihood part of Bayesian inference
## // Outcome model N(mu, sigma^2) (use SD rather than Var)
## for (i in 1:n) {
## // Loop over individuals
##
## for (h in 1:H) {
## // Loop over clusters within each individual
## // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h]))
## contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]);
## }
##
## // log(sum(exp(contribution element)))
## target += log_sum_exp(contributions);
##
## }
## }
##
## generated quantities {
##
## real log_f[grid_length];
##
## for (g in 1:grid_length) {
## // Definiting here avoids reporting of these intermediates.
## real contributions[H];
## real grid_value;
##
## grid_value = grid_min + grid_step * (g - 1);
## for (h in 1:H) {
## contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]);
## }
##
## log_f[g] = log_sum_exp(contributions);
## }
##
## }
##
Let us try a 6 latent cluster model with otherwise similar vague priors except Dirichlet(5,…,5) to avoid cluster degeneration.
normal_fixed_mixture_stan_fit6 <-
rstan::stan(model_code = normal_fixed_mixture_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 6*5,
H = 6,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 1425 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 3441 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
check_hmc_diagnostics(normal_fixed_mixture_stan_fit6)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print_relevant_pars(normal_fixed_mixture_stan_fit6)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## mu[1] 2.040000e+01 1.380000e+00 9.370000e+00 6.48 19.78 22.00 23.16 3.239000e+01 46
## mu[2] 1.588000e+01 3.070000e+00 1.931000e+01 -47.72 10.00 20.46 23.79 3.373000e+01 40
## mu[3] 1.824000e+01 1.960000e+00 8.330000e+00 8.88 9.96 20.19 22.81 3.163000e+01 18
## mu[4] 1.840000e+01 1.050000e+00 1.248000e+01 -5.86 9.92 19.95 22.86 3.282000e+01 140
## mu[5] 1.948000e+01 2.950000e+00 1.161000e+01 -15.78 19.71 21.57 23.60 3.299000e+01 16
## mu[6] 1.863000e+01 1.830000e+00 8.900000e+00 6.77 16.16 20.05 22.74 3.057000e+01 24
## sigma_squared[1] 2.416667e+299 NaN Inf 0.08 0.41 1.55 8.43 7.270808e+93 NaN
## sigma_squared[2] 2.373119e+303 NaN Inf 0.02 0.38 2.06 36.77 2.189092e+234 NaN
## sigma_squared[3] 8.516214e+41 8.503134e+41 9.317753e+43 0.02 0.27 0.85 3.69 8.547000e+01 12008
## sigma_squared[4] 3.410077e+285 NaN Inf 0.07 0.33 0.88 6.03 3.374356e+57 NaN
## sigma_squared[5] 3.286989e+304 NaN Inf 0.08 0.36 1.76 9.56 4.130435e+193 NaN
## sigma_squared[6] 9.329738e+298 NaN Inf 0.00 0.20 0.59 3.64 8.381711e+98 NaN
## Pi[1] 1.900000e-01 1.000000e-02 9.000000e-02 0.04 0.13 0.18 0.26 3.700000e-01 91
## Pi[2] 1.300000e-01 2.000000e-02 9.000000e-02 0.02 0.07 0.12 0.18 3.400000e-01 24
## Pi[3] 1.700000e-01 3.000000e-02 1.000000e-01 0.04 0.10 0.14 0.24 3.600000e-01 14
## Pi[4] 1.800000e-01 3.000000e-02 1.000000e-01 0.04 0.10 0.15 0.27 3.700000e-01 13
## Pi[5] 1.600000e-01 2.000000e-02 1.000000e-01 0.03 0.08 0.14 0.22 3.800000e-01 20
## Pi[6] 1.600000e-01 2.000000e-02 9.000000e-02 0.04 0.08 0.14 0.22 3.500000e-01 14
## sigma[1] 4.766986e+147 4.489461e+147 4.915935e+149 0.29 0.64 1.25 2.90 2.182751e+46 11990
## sigma[2] 9.534330e+149 NaN 4.870737e+151 0.15 0.62 1.43 6.06 1.478345e+117 NaN
## sigma[3] 8.944962e+18 8.634780e+18 9.228285e+20 0.14 0.52 0.92 1.92 9.240000e+00 11422
## sigma[4] 5.330830e+140 5.325445e+140 5.839587e+142 0.27 0.58 0.94 2.46 5.781632e+28 12024
## sigma[5] 5.543089e+150 NaN 1.812233e+152 0.29 0.60 1.33 3.09 4.853280e+96 NaN
## sigma[6] 2.802587e+147 2.786289e+147 3.054461e+149 0.04 0.45 0.77 1.91 2.869078e+49 12018
## lp__ -2.679500e+02 1.070000e+00 4.840000e+00 -276.97 -271.73 -268.10 -263.97 -2.597000e+02 21
## Rhat
## mu[1] 1.15
## mu[2] 1.18
## mu[3] 1.75
## mu[4] 1.13
## mu[5] 1.35
## mu[6] 1.37
## sigma_squared[1] NaN
## sigma_squared[2] NaN
## sigma_squared[3] 1.00
## sigma_squared[4] NaN
## sigma_squared[5] NaN
## sigma_squared[6] NaN
## Pi[1] 1.16
## Pi[2] 1.25
## Pi[3] 1.47
## Pi[4] 1.50
## Pi[5] 1.31
## Pi[6] 1.34
## sigma[1] 1.00
## sigma[2] 1.00
## sigma[3] 1.00
## sigma[4] 1.00
## sigma[5] 1.00
## sigma[6] 1.00
## lp__ 1.29
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:24:46 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit6)
pairs_plot_all(normal_fixed_mixture_stan_fit6)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in par(usr = c(usr[1:2], 0, 1.5)): Internal(pretty()): very large range.. corrected
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
plot_draws(normal_fixed_mixture_stan_fit6)
There were many divergent transitions (red dots in the pairs plots). The cross appearance of the mu pairs plot likely indicates that once the cluster has essentially zero probability, any value of mu is allowed. Interestingly, the resulting density estimates are quite similar.
Let us sanity check with just one cluster.
normal_fixed_mixture_stan_fit1 <-
rstan::stan(model_code = normal_fixed_mixture_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 1,
H = 1,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
print_relevant_pars(normal_fixed_mixture_stan_fit1)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 20.84 0.00 0.51 19.85 20.49 20.84 21.18 21.83 10827 1
## sigma_squared[1] 21.37 0.04 3.45 15.60 18.90 21.05 23.48 28.93 8799 1
## Pi[1] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
## sigma[1] 4.61 0.00 0.37 3.95 4.35 4.59 4.85 5.38 8957 1
## lp__ -241.65 0.01 1.01 -244.42 -242.03 -241.34 -240.94 -240.66 5256 1
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:25:38 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit1)
plot_draws(normal_fixed_mixture_stan_fit1)
This gave a similar result to the first single normal fit, except that the additional \(\pi\) parameter, which can only be 1 in this case.
This model has two latent clusters.
normal_fixed_mixture_stan_fit2 <-
rstan::stan(model_code = normal_fixed_mixture_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 2*5,
H = 2,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
print_relevant_pars(normal_fixed_mixture_stan_fit2)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 20.50 0.45 1.79 16.09 19.50 21.18 21.69 22.63 16 1.29
## mu[2] 17.94 1.97 4.93 9.47 10.49 20.95 21.39 22.01 6 4.81
## sigma_squared[1] 40.02 13.58 39.91 2.56 6.72 21.86 65.67 131.81 9 1.82
## sigma_squared[2] 20.57 12.79 35.64 0.11 1.69 3.52 11.83 113.58 8 2.13
## Pi[1] 0.55 0.10 0.25 0.20 0.31 0.52 0.81 0.91 6 3.84
## Pi[2] 0.45 0.10 0.25 0.09 0.19 0.48 0.69 0.80 6 3.84
## sigma[1] 5.47 1.21 3.18 1.60 2.59 4.67 8.10 11.48 7 2.72
## sigma[2] 3.19 1.27 3.22 0.34 1.30 1.88 3.34 10.66 6 3.69
## lp__ -232.21 0.62 2.25 -237.45 -233.70 -231.72 -230.39 -229.20 13 1.37
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:25:57 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit2)
pairs_plot_all(normal_fixed_mixture_stan_fit2)
plot_draws(normal_fixed_mixture_stan_fit2)
No divergent transitions were observed with just two clusters. However, Rhat statistics are high and mixing is poor including lp__.
normal_fixed_mixture_stan_fit3 <-
rstan::stan(model_code = normal_fixed_mixture_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 3*5,
H = 3,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 16 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 908 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_stan_fit3)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 17.84 2.44 6.13 9.42 9.83 20.94 21.47 27.78 6 4.54
## mu[2] 20.98 2.26 5.95 9.51 19.88 21.32 24.21 32.82 7 2.76
## mu[3] 17.85 2.87 7.25 9.40 9.75 21.12 23.22 30.82 6 4.18
## sigma_squared[1] 14.20 9.30 29.20 0.11 0.36 3.45 7.10 93.55 10 1.62
## sigma_squared[2] 15.05 6.17 26.85 0.12 0.55 4.02 21.93 75.96 19 1.23
## sigma_squared[3] 12.00 6.03 24.35 0.09 0.27 2.47 16.74 69.54 16 1.31
## Pi[1] 0.38 0.10 0.26 0.07 0.13 0.28 0.66 0.80 7 3.29
## Pi[2] 0.34 0.08 0.23 0.07 0.15 0.26 0.58 0.78 8 2.49
## Pi[3] 0.28 0.08 0.22 0.07 0.12 0.17 0.38 0.77 7 2.73
## sigma[1] 2.63 1.01 2.69 0.33 0.60 1.86 2.66 9.67 7 2.62
## sigma[2] 2.95 0.84 2.52 0.35 0.74 2.00 4.68 8.72 9 1.77
## sigma[3] 2.43 0.87 2.46 0.31 0.52 1.57 4.09 8.34 8 2.08
## lp__ -233.58 0.04 2.32 -238.98 -234.89 -233.21 -231.87 -230.17 2679 1.01
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:26:34 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit3)
pairs_plot_all(normal_fixed_mixture_stan_fit3)
plot_draws(normal_fixed_mixture_stan_fit3)
Interestingly, lp__ has Rhat 1.00. However, the actual density estimates took on two shapes. One is with two separate peaks and the other is two peaks joined together.
normal_fixed_mixture_stan_fit4 <-
rstan::stan(model_code = normal_fixed_mixture_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 4*5,
H = 4,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 123 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 927 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_stan_fit4)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 20.45 2.08 5.41 9.62 19.69 22.05 23.49 28.51 7 3.12
## mu[2] 21.15 1.50 3.93 9.58 19.95 22.26 22.93 26.52 7 2.98
## mu[3] 15.89 2.56 6.35 9.36 9.70 10.89 22.45 24.28 6 7.72
## mu[4] 19.37 2.40 6.08 9.45 14.16 20.01 23.62 26.90 6 4.56
## sigma_squared[1] 18.24 8.36 33.75 0.14 0.43 1.75 30.01 88.13 16 1.27
## sigma_squared[2] 8.75 6.30 21.65 0.14 0.58 1.62 4.56 64.82 12 1.46
## sigma_squared[3] 2.82 2.63 12.34 0.00 0.17 0.33 0.83 34.56 22 1.31
## sigma_squared[4] 14.02 7.04 24.43 0.10 0.31 0.78 21.12 73.68 12 1.50
## Pi[1] 0.24 0.04 0.11 0.08 0.15 0.23 0.33 0.48 10 1.63
## Pi[2] 0.34 0.06 0.16 0.09 0.23 0.32 0.40 0.74 8 2.18
## Pi[3] 0.20 0.05 0.13 0.03 0.10 0.14 0.30 0.49 8 2.16
## Pi[4] 0.22 0.04 0.11 0.07 0.13 0.20 0.31 0.45 9 1.74
## sigma[1] 3.07 1.08 2.96 0.38 0.66 1.32 5.48 9.39 8 2.21
## sigma[2] 2.04 0.78 2.15 0.37 0.76 1.27 2.14 8.05 8 2.26
## sigma[3] 0.97 0.40 1.37 0.05 0.41 0.57 0.91 5.88 12 1.92
## sigma[4] 2.59 0.96 2.70 0.32 0.56 0.88 4.60 8.58 8 2.45
## lp__ -237.94 1.43 4.45 -249.49 -239.43 -236.82 -234.91 -232.51 10 1.64
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:27:35 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit4)
pairs_plot_all(normal_fixed_mixture_stan_fit4)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
plot_draws(normal_fixed_mixture_stan_fit4)
shinystan::launch_shinystan(normal_fixed_mixture_stan_fit4)
normal_fixed_mixture_stan_fit5 <-
rstan::stan(model_code = normal_fixed_mixture_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 5*5,
H = 5,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 3185 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 944 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_stan_fit5)
## Inference for Stan model: 68985fd05a725a6fbd3d2ee07905d347.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 1.658000e+01 2.63 1.245000e+01 -17.95 9.70 20.51 23.38 3.223000e+01 22 1.25
## mu[2] 1.634000e+01 3.05 1.827000e+01 -40.76 19.04 21.96 23.08 3.730000e+01 36 1.17
## mu[3] 2.030000e+01 1.67 9.080000e+00 4.66 19.81 22.23 23.11 3.174000e+01 30 1.27
## mu[4] 1.733000e+01 2.22 5.720000e+00 9.42 9.85 19.76 20.46 2.580000e+01 7 3.50
## mu[5] 1.834000e+01 2.50 1.300000e+01 -23.57 19.33 21.24 23.44 3.311000e+01 27 1.21
## sigma_squared[1] 2.220715e+304 NaN Inf 0.10 0.33 2.05 36.03 3.327886e+167 NaN NaN
## sigma_squared[2] 1.219606e+303 NaN Inf 0.08 0.61 2.73 41.14 1.170732e+235 NaN NaN
## sigma_squared[3] 1.111182e+304 NaN Inf 0.10 0.48 1.50 8.56 2.728439e+110 NaN NaN
## sigma_squared[4] 5.720000e+00 2.24 1.129700e+02 0.07 0.24 0.42 0.95 4.036000e+01 2553 1.01
## sigma_squared[5] 1.184024e+304 NaN Inf 0.11 0.42 2.08 34.60 1.858845e+189 NaN NaN
## Pi[1] 1.600000e-01 0.03 1.000000e-01 0.03 0.10 0.14 0.22 3.900000e-01 14 1.45
## Pi[2] 1.900000e-01 0.04 1.200000e-01 0.03 0.08 0.18 0.29 4.300000e-01 11 1.62
## Pi[3] 2.300000e-01 0.03 1.200000e-01 0.04 0.14 0.23 0.32 4.800000e-01 12 1.46
## Pi[4] 2.200000e-01 0.03 1.100000e-01 0.06 0.12 0.21 0.30 4.300000e-01 10 1.61
## Pi[5] 1.900000e-01 0.03 1.000000e-01 0.03 0.11 0.17 0.27 4.100000e-01 14 1.40
## sigma[1] 3.497831e+150 NaN 1.489858e+152 0.31 0.58 1.43 6.00 5.710081e+83 NaN 1.00
## sigma[2] 6.982520e+149 NaN 3.491733e+151 0.29 0.78 1.65 6.41 3.421521e+117 NaN 1.00
## sigma[3] 2.519498e+150 NaN 1.053869e+152 0.32 0.69 1.22 2.93 1.648000e+55 NaN 1.01
## sigma[4] 1.170000e+00 0.28 2.080000e+00 0.27 0.49 0.65 0.97 6.350000e+00 57 1.13
## sigma[5] 2.543309e+150 NaN 1.087877e+152 0.34 0.65 1.44 5.88 4.139767e+94 NaN 1.01
## lp__ -2.538000e+02 1.27 5.130000e+00 -263.93 -257.68 -253.62 -249.37 -2.458100e+02 16 1.47
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:28:57 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_stan_fit5)
pairs_plot_all(normal_fixed_mixture_stan_fit5)
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in par(usr = c(usr[1:2], 0, 1.5)): Internal(pretty()): very large range.. corrected
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in plot.window(...): Internal(pretty()): very large range.. corrected
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
plot_draws(normal_fixed_mixture_stan_fit5)
Rhat for lp__ increased. The resulting density estimates appear somewhat different.
normal_fixed_mixture_ordered_mu_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture.stan")
biostan::print_stan_code(normal_fixed_mixture_ordered_mu_stan_code, section = NULL)
## data {
## // Hyperparameters
## real alpha;
## real beta;
## real m;
## real s_squared;
## real<lower=0> dirichlet_alpha;
##
## // Define variables in data
## // Number of observations (an integer)
## int<lower=0> n;
## // Outcome (a real vector of length n)
## real y[n];
## // Number of latent clusters
## int<lower=1> H;
##
## // Grid evaluation
## real grid_max;
## real grid_min;
## int<lower=1> grid_length;
## }
##
## transformed data {
## real s;
## real grid_step;
##
## s = sqrt(s_squared);
## grid_step = (grid_max - grid_min) / (grid_length - 1);
## }
##
## parameters {
## // Define parameters to estimate
## // Population mean (a real number)
## ordered[H] mu;
## // Population variance (a positive real number)
## real<lower=0> sigma_squared[H];
## // Cluster probability
## simplex[H] Pi;
## }
##
## transformed parameters {
## // Population standard deviation (a positive real number)
## real<lower=0> sigma[H];
## // Standard deviation (derived from variance)
## sigma = sqrt(sigma_squared);
## }
##
## model {
## // Temporary vector for loop use. Need to come first before priors.
## real contributions[H];
##
## // Prior part of Bayesian inference
## // All vectorized
## // Mean
## mu ~ normal(m, s);
## // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior
## sigma_squared ~ inv_gamma(alpha, beta);
## // cluster probability vector
## Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H));
##
## // Likelihood part of Bayesian inference
## // Outcome model N(mu, sigma^2) (use SD rather than Var)
## for (i in 1:n) {
## // Loop over individuals
##
## for (h in 1:H) {
## // Loop over clusters within each individual
## // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h]))
## contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]);
## }
##
## // log(sum(exp(contribution element)))
## target += log_sum_exp(contributions);
##
## }
## }
##
## generated quantities {
##
## real log_f[grid_length];
##
## for (g in 1:grid_length) {
## // Definiting here avoids reporting of these intermediates.
## real contributions[H];
## real grid_value;
##
## grid_value = grid_min + grid_step * (g - 1);
## for (h in 1:H) {
## contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]);
## }
##
## log_f[g] = log_sum_exp(contributions);
## }
##
## }
##
normal_fixed_mixture_ordered_mu_stan_fit4 <-
rstan::stan(model_code = normal_fixed_mixture_ordered_mu_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 4*5,
H = 4,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 5 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 171 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_ordered_mu_stan_fit4)
## Inference for Stan model: 1a37d3991c5cd122dafa78b99a6fc985.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 9.31 0.49 1.56 1.78 9.51 9.67 9.81 10.14 10 2.05
## mu[2] 18.74 1.11 2.82 9.68 19.46 19.72 19.87 20.19 6 3.94
## mu[3] 21.56 0.46 1.31 19.55 19.97 22.06 22.69 23.27 8 2.16
## mu[4] 23.94 0.41 2.01 21.95 22.75 23.21 24.59 29.65 24 1.22
## sigma_squared[1] 314.75 237.65 19445.28 0.06 0.16 0.24 0.43 1061.11 6695 1.00
## sigma_squared[2] 22.67 15.15 94.70 0.12 0.32 0.48 1.25 148.65 39 1.11
## sigma_squared[3] 9.83 6.31 21.38 0.21 0.57 1.53 4.20 66.13 11 1.57
## sigma_squared[4] 22.04 6.96 39.62 0.69 1.81 5.68 33.09 92.60 32 1.15
## Pi[1] 0.11 0.00 0.03 0.06 0.09 0.11 0.14 0.19 2646 1.01
## Pi[2] 0.27 0.03 0.10 0.09 0.19 0.28 0.35 0.45 11 1.50
## Pi[3] 0.33 0.02 0.10 0.14 0.26 0.32 0.39 0.55 23 1.21
## Pi[4] 0.29 0.04 0.12 0.09 0.19 0.28 0.38 0.54 11 1.56
## sigma[1] 3.11 3.35 17.47 0.25 0.39 0.49 0.65 32.57 27 1.16
## sigma[2] 2.60 1.35 3.99 0.35 0.56 0.69 1.12 12.19 9 2.00
## sigma[3] 2.16 0.80 2.27 0.46 0.76 1.24 2.05 8.13 8 2.36
## sigma[4] 3.76 0.85 2.81 0.83 1.35 2.38 5.75 9.62 11 1.67
## lp__ -235.17 1.13 4.02 -244.19 -237.45 -234.53 -232.19 -229.10 13 1.42
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:30:56 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_ordered_mu_stan_fit4)
pairs_plot_all(normal_fixed_mixture_ordered_mu_stan_fit4)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
plot_draws(normal_fixed_mixture_ordered_mu_stan_fit4)
normal_fixed_mixture_ordered_Pi_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture_ordered_Pi.stan")
biostan::print_stan_code(normal_fixed_mixture_ordered_Pi_stan_code, section = NULL)
## data {
## // Hyperparameters
## real alpha;
## real beta;
## real m;
## real s_squared;
## real<lower=0> dirichlet_alpha;
##
## // Define variables in data
## // Number of observations (an integer)
## int<lower=0> n;
## // Outcome (a real vector of length n)
## real y[n];
## // Number of latent clusters
## int<lower=1> H;
##
## // Grid evaluation
## real grid_max;
## real grid_min;
## int<lower=1> grid_length;
## }
##
## transformed data {
## real s;
## real grid_step;
##
## s = sqrt(s_squared);
## grid_step = (grid_max - grid_min) / (grid_length - 1);
## }
##
## parameters {
## // Define parameters to estimate
## // Population mean (a real number)
## vector[H] mu;
## // Population variance (a positive real number)
## real<lower=0> sigma_squared[H];
## // https://discourse.mc-stan.org/t/ordered-simplex/1835
## // Unnormalized cluster relative size
## positive_ordered[H] lambda;
## }
##
## transformed parameters {
## // Population standard deviation (a positive real number)
## real<lower=0> sigma[H] = sqrt(sigma_squared);
## // Cluster probability
## simplex[H] Pi = lambda / sum(lambda);
##
## // Standard deviation (derived from variance)
## // sigma = sqrt(sigma_squared);
## // Cluster probability
## // Pi = lambda / sum(lambda);
## }
##
## model {
## // Temporary vector for loop use. Need to come first before priors.
## real contributions[H];
##
## // Prior part of Bayesian inference
## // All vectorized
## // Mean
## mu ~ normal(m, s);
## // sigma^2 has inverse gamma (alpha = 1, beta = 1) prior
## sigma_squared ~ inv_gamma(alpha, beta);
## // cluster probability vector
## /* Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H)); */
## target += gamma_lpdf(lambda | rep_vector(dirichlet_alpha / H, H), 1);
##
## // Likelihood part of Bayesian inference
## // Outcome model N(mu, sigma^2) (use SD rather than Var)
## for (i in 1:n) {
## // Loop over individuals
##
## for (h in 1:H) {
## // Loop over clusters within each individual
## // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h]))
## contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]);
## }
##
## // log(sum(exp(contribution element)))
## target += log_sum_exp(contributions);
##
## }
## }
##
## generated quantities {
##
## real log_f[grid_length];
##
## for (g in 1:grid_length) {
## // Definiting here avoids reporting of these intermediates.
## real contributions[H];
## real grid_value;
##
## grid_value = grid_min + grid_step * (g - 1);
## for (h in 1:H) {
## contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]);
## }
##
## log_f[g] = log_sum_exp(contributions);
## }
##
## }
##
normal_fixed_mixture_ordered_Pi_stan_fit4 <-
rstan::stan(model_code = normal_fixed_mixture_ordered_Pi_stan_code,
data = list(alpha = 10^(-3), beta = 10^(-3),
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 4*5,
H = 4,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 596 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
print_relevant_pars(normal_fixed_mixture_ordered_Pi_stan_fit4)
## Inference for Stan model: 8442f443c45aac0def9566d268278405.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 1.629000e+01 3.02 1.230000e+01 -11.98 9.70 19.44 23.56 3.352000e+01 17 1.32
## mu[2] 1.991000e+01 1.95 5.120000e+00 9.52 19.62 20.00 22.95 2.792000e+01 7 3.12
## mu[3] 1.765000e+01 2.34 5.840000e+00 9.41 9.84 19.81 22.59 2.541000e+01 6 5.93
## mu[4] 2.189000e+01 0.37 1.380000e+00 19.03 21.35 21.97 22.64 2.462000e+01 14 1.43
## sigma_squared[1] 5.589569e+303 NaN Inf 0.07 0.19 0.41 26.90 1.159897e+169 NaN NaN
## sigma_squared[2] 9.900000e+00 6.37 2.373000e+01 0.08 0.25 0.54 2.39 6.947000e+01 14 1.40
## sigma_squared[3] 6.000000e+00 4.81 1.383000e+01 0.10 0.26 0.49 1.85 4.562000e+01 8 1.95
## sigma_squared[4] 1.324000e+01 6.86 1.855000e+01 0.42 2.51 4.34 19.33 6.746000e+01 7 2.41
## Pi[1] 1.200000e-01 0.01 4.000000e-02 0.04 0.09 0.11 0.14 1.900000e-01 25 1.17
## Pi[2] 1.900000e-01 0.02 5.000000e-02 0.10 0.15 0.19 0.24 2.900000e-01 11 1.47
## Pi[3] 2.500000e-01 0.02 6.000000e-02 0.14 0.20 0.27 0.30 3.600000e-01 9 1.71
## Pi[4] 4.400000e-01 0.03 1.000000e-01 0.30 0.36 0.41 0.51 6.600000e-01 13 1.39
## sigma[1] 1.115587e+150 NaN 7.475821e+151 0.26 0.44 0.64 5.19 3.405717e+84 NaN 1.00
## sigma[2] 1.930000e+00 0.82 2.480000e+00 0.29 0.50 0.73 1.55 8.330000e+00 9 1.96
## sigma[3] 1.550000e+00 0.73 1.900000e+00 0.32 0.51 0.70 1.36 6.750000e+00 7 3.10
## sigma[4] 2.970000e+00 0.80 2.090000e+00 0.65 1.59 2.08 4.40 8.210000e+00 7 2.91
## lp__ -2.189600e+02 1.93 5.560000e+00 -234.57 -220.95 -217.93 -215.33 -2.114200e+02 8 1.92
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:32:22 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_ordered_Pi_stan_fit4)
pairs_plot_all(normal_fixed_mixture_ordered_Pi_stan_fit4)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in (function (x, y = NULL, nbin = 128, bandwidth, colramp = colorRampPalette(c("white", : NAs introduced by
## coercion to integer range
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, : Binning grid too coarse for current
## (small) bandwidth: consider increasing 'gridsize'
plot_draws(normal_fixed_mixture_ordered_Pi_stan_fit4)
normal_fixed_mixture_half_cauchy_stan_code <- biostan::read_stan_file("./bayesianideas_density_normal_fixed_mixture_half_cauchy.stan")
biostan::print_stan_code(normal_fixed_mixture_half_cauchy_stan_code, section = NULL)
## data {
## // Hyperparameters
## // For sigma
## // https://en.wikipedia.org/wiki/Cauchy_distribution
## real location;
## real scale;
## // For mu
## real m;
## real s_squared;
## real<lower=0> dirichlet_alpha;
##
## // Define variables in data
## // Number of observations (an integer)
## int<lower=0> n;
## // Outcome (a real vector of length n)
## real y[n];
## // Number of latent clusters
## int<lower=1> H;
##
## // Grid evaluation
## real grid_max;
## real grid_min;
## int<lower=1> grid_length;
## }
##
## transformed data {
## real s;
## real grid_step;
##
## s = sqrt(s_squared);
## grid_step = (grid_max - grid_min) / (grid_length - 1);
## }
##
## parameters {
## // Define parameters to estimate
## // Population mean (a real number)
## ordered[H] mu;
## // Population variance (a positive real number)
## real<lower=0> sigma[H];
## // Cluster probability
## simplex[H] Pi;
## }
##
## transformed parameters {
## }
##
## model {
## // Temporary vector for loop use. Need to come first before priors.
## real contributions[H];
##
## // Prior part of Bayesian inference
## // All vectorized
## // Mean
## mu ~ normal(m, s);
## // sigma has half-cauchy (alpha = 1, beta = 1) prior
## sigma ~ cauchy(location, scale);
## // cluster probability vector
## Pi ~ dirichlet(rep_vector(dirichlet_alpha / H, H));
##
## // Likelihood part of Bayesian inference
## // Outcome model N(mu, sigma^2) (use SD rather than Var)
## for (i in 1:n) {
## // Loop over individuals
##
## for (h in 1:H) {
## // Loop over clusters within each individual
## // Log likelihood contributions log(Pi[h] * N(y[i] | mu[h],sigma[h]))
## contributions[h] = log(Pi[h]) + normal_lpdf(y[i] | mu[h], sigma[h]);
## }
##
## // log(sum(exp(contribution element)))
## target += log_sum_exp(contributions);
##
## }
## }
##
## generated quantities {
##
## real log_f[grid_length];
##
## for (g in 1:grid_length) {
## // Definiting here avoids reporting of these intermediates.
## real contributions[H];
## real grid_value;
##
## grid_value = grid_min + grid_step * (g - 1);
## for (h in 1:H) {
## contributions[h] = log(Pi[h]) + normal_lpdf(grid_value | mu[h], sigma[h]);
## }
##
## log_f[g] = log_sum_exp(contributions);
## }
##
## }
##
normal_fixed_mixture_half_cauchy_stan_fit4 <-
rstan::stan(model_code = normal_fixed_mixture_half_cauchy_stan_code,
data = list(location = 0, scale = 2,
m = 0, s_squared = 10^(3),
n = nrow(galaxy),
y = galaxy$k_speed,
dirichlet_alpha = 4*5,
H = 4,
grid_max = grid_max, grid_min = grid_min, grid_length = grid_length),
chains = 12)
## Warning: There were 5 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
check_hmc_diagnostics(normal_fixed_mixture_half_cauchy_stan_fit4)
##
## Divergences:
##
## Tree depth:
##
## Energy:
print_relevant_pars(normal_fixed_mixture_half_cauchy_stan_fit4,
pars = c("mu","Pi","sigma","lp__"))
## Inference for Stan model: 487c8d056b9f381c7c5598bef442b3d5.
## 12 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu[1] 9.73 0.01 0.24 9.26 9.58 9.72 9.87 10.22 1125 1.02
## mu[2] 19.73 0.13 0.57 18.36 19.66 19.79 19.93 20.34 20 1.26
## mu[3] 22.22 0.31 0.94 19.74 21.90 22.52 22.86 23.34 9 1.70
## mu[4] 24.66 0.55 2.26 22.19 23.02 23.93 25.70 30.45 17 1.29
## Pi[1] 0.11 0.00 0.03 0.06 0.09 0.11 0.13 0.18 2927 1.01
## Pi[2] 0.32 0.02 0.09 0.14 0.26 0.33 0.38 0.46 18 1.23
## Pi[3] 0.33 0.02 0.10 0.14 0.26 0.32 0.39 0.55 24 1.23
## Pi[4] 0.24 0.03 0.10 0.09 0.16 0.22 0.30 0.47 13 1.44
## sigma[1] 0.58 0.00 0.24 0.30 0.43 0.54 0.67 1.18 2418 1.01
## sigma[2] 1.37 0.89 2.29 0.40 0.60 0.70 0.83 9.36 7 3.22
## sigma[3] 2.42 0.71 2.17 0.60 1.12 1.49 2.33 8.02 9 2.18
## sigma[4] 4.85 1.09 3.09 0.86 1.64 4.96 6.37 11.98 8 2.25
## lp__ -235.81 0.53 3.17 -243.19 -237.55 -235.35 -233.58 -230.82 36 1.12
##
## Samples were drawn using NUTS(diag_e) at Sat Apr 27 06:33:37 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
traceplot_all(normal_fixed_mixture_half_cauchy_stan_fit4,
pars = c("mu","Pi","sigma","lp__"))
pairs_plot_all(normal_fixed_mixture_half_cauchy_stan_fit4,
pars = c("mu","Pi","sigma","lp__"))
## Error in pairs.default(x = structure(c(-236.856315027416, -233.670884234658, : only one column in the argument to 'pairs'
plot_draws(normal_fixed_mixture_half_cauchy_stan_fit4)
print(sessionInfo())
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DPpackage_1.1-7.4 rstan_2.18.2 StanHeaders_2.18.1 forcats_0.3.0 stringr_1.4.0
## [6] dplyr_0.8.0 purrr_0.3.0 readr_1.3.1 tidyr_0.8.2 tibble_2.0.1
## [11] ggplot2_3.1.0 tidyverse_1.2.1 doRNG_1.7.1 rngtools_1.3.1 pkgmaker_0.27
## [16] registry_0.5 doParallel_1.0.14 iterators_1.0.10 foreach_1.4.4 knitr_1.21
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.4 colorspace_1.4-0 ggridges_0.5.1 rsconnect_0.8.13
## [5] ggstance_0.3.1 markdown_0.9 base64enc_0.1-3 rstudioapi_0.9.0
## [9] svUnit_0.7-12 DT_0.5 fansi_0.4.0 lubridate_1.7.4
## [13] xml2_1.2.0 codetools_0.2-16 splines_3.5.2 shinythemes_1.1.2
## [17] bayesplot_1.6.0 jsonlite_1.6 nloptr_1.2.1 broom_0.5.1
## [21] shiny_1.2.0 compiler_3.5.2 httr_1.4.0 backports_1.1.3
## [25] assertthat_0.2.0 Matrix_1.2-15 lazyeval_0.2.1 cli_1.0.1
## [29] later_0.8.0 htmltools_0.3.6 prettyunits_1.0.2 tools_3.5.2
## [33] igraph_1.2.4 coda_0.19-2 gtable_0.2.0 glue_1.3.0
## [37] reshape2_1.4.3 Rcpp_1.0.0 cellranger_1.1.0 nlme_3.1-137
## [41] crosstalk_1.0.0 xfun_0.4 ps_1.3.0 lme4_1.1-20
## [45] rvest_0.3.2 mime_0.6 miniUI_0.1.1.1 tidybayes_1.0.3
## [49] gtools_3.8.1 MASS_7.3-51.1 zoo_1.8-4 scales_1.0.0
## [53] rstanarm_2.18.2 colourpicker_1.0 hms_0.4.2 promises_1.0.1
## [57] inline_0.3.15 shinystan_2.5.0 yaml_2.2.0 gridExtra_2.3
## [61] loo_2.0.0 stringi_1.3.1 dygraphs_1.1.1.6 pkgbuild_1.0.2
## [65] bibtex_0.4.2 rlang_0.3.1 pkgconfig_2.0.2 matrixStats_0.54.0
## [69] evaluate_0.13 lattice_0.20-38 rstantools_1.5.1 htmlwidgets_1.3
## [73] labeling_0.3 processx_3.2.1 tidyselect_0.2.5 biostan_0.1.0
## [77] plyr_1.8.4 magrittr_1.5 R6_2.4.0 generics_0.0.2
## [81] pillar_1.3.1 haven_2.0.0 withr_2.1.2 xts_0.11-2
## [85] survival_2.43-3 modelr_0.1.3 crayon_1.3.4 arrayhelpers_1.0-20160527
## [89] KernSmooth_2.23-15 utf8_1.1.4 rmarkdown_1.11 grid_3.5.2
## [93] readxl_1.2.0 callr_3.1.1 threejs_0.3.1 digest_0.6.18
## [97] xtable_1.8-3 httpuv_1.4.5.1 stats4_3.5.2 munsell_0.5.0
## [101] shinyjs_1.0
## Record execution time and multicore use
end_time <- Sys.time()
diff_time <- difftime(end_time, start_time, units = "auto")
cat("Started ", as.character(start_time), "\n",
"Finished ", as.character(end_time), "\n",
"Time difference of ", diff_time, " ", attr(diff_time, "units"), "\n",
"Used ", foreach::getDoParWorkers(), " cores\n",
"Used ", foreach::getDoParName(), " as backend\n",
sep = "")
## Started 2019-04-27 06:22:03
## Finished 2019-04-27 06:34:00
## Time difference of 11.9426 mins
## Used 12 cores
## Used doParallelMC as backend